

global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM D_STATE D_ACTION THETA


 
ITERMAX=100000; % No of iterations of learning 
NA=2; % Number of actions in each state 
NS=5; % Number of states 

THETA=0.15;

% The following two global variables define the distinguished state-action
% pair



SMALL=-1000000; 
 





SMALL=-1000000; 
TPM=zeros(NS,NS,NA);
TRM=zeros(NS,NS,NA);

TPM(:,:,1)=[0.25,0.25,0.1,0.1,0.3;...
    0.25,0.05,0.2,0,0.5;...
    0.3,0.2,0.1,0.1,0.3;...
    0.05,0.15,0,0.3,0.5;...
    0.1,0.15,0.1,0.45,0.2];



TPM(:,:,2)=[0.3,0.1,0.1,0.25,0.25;...
    0.15,0.15,0.2,0,0.5;...
    0.1,0.4,0.1,0.1,0.3;...
    0.15,0.5,0,0.1,0.25;...
    0.2,0.15,0.1,0.45,0.1];



TRM(:,:,1)=[6,-5,-10,11,11;...
    16,5,10,-14,13;...
    8,10,-1,-2,1;...
    7,-1,10,10,15;...
    0,-4,12,17,0];
   

TRM(:,:,2)=[-6,-8,-11,12,1;...
    2,4,-6,-8,-8;...
    8,8,-9,-1,-9;...
    0,3,14,21,11;...
    15,21,-8,1,15];







